import urllib.request
import sys
import os

#1.首先抓取省份的列表
url1 = 'http://m.weather.com.cn/data3/city.xml'
#此处用decode解码
content1 = urllib.request.urlopen(url1).read().decode('utf-8')
#print(content1)
provinces = content1.split(',')[1:] #这里去除了 00|省 这一项
result = 'city = {\n' #记录最终结果
#2.对于每个省，抓取城市列表
url = 'http://m.weather.com.cn/data3/city%s.xml'
for p in provinces:
    p_code = p.split('|')[0]
    url2 = url % p_code
    content2 = urllib.request.urlopen(url2).read().decode('utf-8')
    #print(content2)
    cities = content2.split(',')
    #3.再对于每个城市，抓取地区列表
    for c in cities:
        c_code = c.split('|')[0]
        url3 = url % c_code
        content3 = urllib.request.urlopen(url3).read().decode('utf-8')
        #print(content3)
        districts = content3.split(',')
        #4.对于每个地区，记录它的名字，然后再发送一次请求，得到它的最终代码
        for d in districts:
            d_pair = d.split('|')
            d_code = d_pair[0]
            if len(d_code)<6: #排除有些地区代号不足6位（已经取消代码） 如 06039
                continue
            name = d_pair[1]
            url4 = url % d_code
            content4 = urllib.request.urlopen(url4).read().decode('utf-8')
            print(content4) #如 010110|101011000
            code = content4.split('|')[1]
            line = "    '%s': '%s',\n" % (name, code)
            result += line
            #print (name + ":" + code)
result += '}'
#print(str(os.path.dirname(sys.argv[0]))
f = open(str(os.path.dirname(sys.argv[0]))+'/city.py', 'w')
f.write(result)
f.close()